* Reset settings and initialize log file
launch, path("share/margins")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Decompose changes in agg. hours into extensive/intensive margins.
*-------------------------------------------------------------------------------


* Prepare data
*-------------------------------------------------------------------------------

* Load the estimation sample
gzuse "$basepath/data/derived/cps_bms_sample.dta.gz", clear
keep pid tm year month wtraked linked_monthly female emp hours

* Retain individuals who are observed in May, June, and July in a given year
keep if inlist(month, 5, 6, 7)
bysort pid year (month): keep if _N == 3

* Restrict to those with valid longitudinal links
bysort pid year (month): keep if linked_monthly[2] == 1 & linked_monthly[3] == 1

* Compare May to July
keep if inlist(month, 5, 7)
tsset pid tm, delta(2)

* Distinguish three states: at work (w), absent from work (a), non-employed (n)
gen byte atwork = (emp == 1 & hours != 0)
gen byte absent = (emp == 1 & hours == 0)
gen byte nonemp = (emp == 0)


* Define gross flows
*-------------------------------------------------------------------------------

* Loop over origin and destination statuses
gen status = ""
foreach o of varlist atwork absent nonemp {
	foreach d of varlist atwork absent nonemp {
		replace status = "`o'_`d'" if month == 7 & (L.`o' == 1 & `d' == 1)
	}
}

* Record hours in both May and July
gen h0 = L.hours if month == 7
gen h1 = hours if month == 7

* Keep one observation per person x year
keep if month == 7

* Aggregate hours to the sex x origin x destination level
gcollapse (sum) h0 h1 (rawsum) pop = wtraked [pw = wtraked], by(female status)

* Compute total (July) population by sex
bysort female (status): egen totpop = total(pop)

* Compute total May and July hours by sex
bysort female (status): egen toth0 = total(h0)
bysort female (status): egen toth1 = total(h1)

* Compute each origin-destination pair's contribution to absolute changes in per capita hours
gen dhlvl = (h1 - h0)/totpop

* Compute each origin-destination pair's contribution to percent changes in per capita hours
gen dhpct = 100 * (h1 - h0)/toth0

* Drop categories that have no bearing on aggregate hours
assert dhlvl == 0 if !regexm(status, "atwork")
drop if !regexm(status, "atwork")

* Compute net changes in hours among workers switching between each pair of states
replace status = "atwork <=> absent" if regexm(status, "atwork") & regexm(status, "absent")
replace status = "atwork <=> nonemp" if regexm(status, "atwork") & regexm(status, "nonemp")
replace status = "atwork <=> atwork" if status == "atwork_atwork"
gcollapse (sum) dhlvl dhpct, by(female status)

* Append observations showing total changes in hours
expand = 2
bysort female status: replace status = "total" if _n == 2
gcollapse (sum) dhlvl dhpct, by(female status)

* Organize the results
reshape wide dhlvl dhpct, i(status) j(female)
gen k = .
replace k = 1 if status == "total"
replace k = 2 if status == "atwork <=> nonemp"
replace k = 3 if status == "atwork <=> absent"
replace k = 4 if status == "atwork <=> atwork"
sort k
drop k

* Show the results
order status dhlvl1 dhpct1 dhlvl0 dhpct0
format %9.1f dh*
list

* Store the results in macros
foreach f of numlist 0 1 {
	foreach v in "lvl" "pct" {
		local f`f'_`v'_overall: display %4.1f dh`v'`f'[1]
		local f`f'_`v'_extensive: display %4.1f dh`v'`f'[2]
		local f`f'_`v'_absence: display %4.1f dh`v'`f'[3]
		local f`f'_`v'_stayers: display %4.1f dh`v'`f'[4]
	}
}


* Create table
*-------------------------------------------------------------------------------

* Write table header
file open hours_decomp using "$basepath/output/margins.tex", write replace
file write hours_decomp "\begin{tabular}{lrrrr}"  _n
file write hours_decomp "\toprule" _n
file write hours_decomp "& \multicolumn{2}{c}{\; \textbf{Women}} & \multicolumn{2}{c}{\; \textbf{Men}} \\ " _n
file write hours_decomp "\textbf{Change in hours worked during reference week} &  {\centering \quad \textbf{$\Delta$}} & {\centering \; \textbf{\%$\Delta$}} &  {\centering \quad \textbf{$\Delta$}} &  {\centering \; \textbf{ \%$\Delta$}} \\ " _n
file write hours_decomp "\midrule \\[-0.5em]" _n

* Write table body: overall change
file write hours_decomp "\emph{Total change from May to July:} \\ "_n

file write hours_decomp "\quad (1) & "
file write hours_decomp "`f1_lvl_overall' & "
file write hours_decomp "`f1_pct_overall' & "
file write hours_decomp "`f0_lvl_overall' & "
file write hours_decomp "`f0_pct_overall' \\ \\ " _n

* Write table body: extensive margin
file write hours_decomp "\emph{Contribution from extensive margin:} \\ "_n

file write hours_decomp "\quad (2) Employed, at work $\longleftrightarrow$ not employed & "
file write hours_decomp "`f1_lvl_extensive' & "
file write hours_decomp "`f1_pct_extensive' & "
file write hours_decomp "`f0_lvl_extensive' & "
file write hours_decomp "`f0_pct_extensive' \\ \\ " _n

* Write table body: intensive margin
file write hours_decomp "\emph{Contribution from intensive margin:} \\ "_n

file write hours_decomp "\quad (3) Employed, at work $\longleftrightarrow$ employed, absent & "
file write hours_decomp "`f1_lvl_absence' & "
file write hours_decomp "`f1_pct_absence' & "
file write hours_decomp "`f0_lvl_absence' & "
file write hours_decomp "`f0_pct_absence' \\ " _n

file write hours_decomp "\quad (4) $\Delta$ among those employed, at work & "
file write hours_decomp "`f1_lvl_stayers' & "
file write hours_decomp "`f1_pct_stayers' & "
file write hours_decomp "`f0_lvl_stayers' & "
file write hours_decomp "`f0_pct_stayers' \\[0.5em] " _n

* Write table footer
file write hours_decomp "\bottomrule" _n
file write hours_decomp "\end{tabular}"
file close hours_decomp

* Close the log file
unlaunch
